{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "# avg last freeze date\n",
    "import pandas as pd\n",
    "import noaa_datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Success!\n"
     ]
    }
   ],
   "source": [
    "# pull down master data\n",
    "stations = noaa_datasets.Stations()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Success!\n"
     ]
    }
   ],
   "source": [
    "inventory = noaa_datasets.Inventory(stations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# figure out best datasets to use for our requirement\n",
    "# these are ones with the lastyear = 2019\n",
    "# and at least 30 years for YearCount\n",
    "# also, we're only looking at low temps, which is TMIN\n",
    "df = inventory.df\n",
    "df = df[ df['Element']=='TMIN' ]\n",
    "df = df[ df['YearCount']>30 ]\n",
    "df = df[ df['LastYear']==2019 ]\n",
    "\n",
    "# whole world have 8,000+ results\n",
    "# let's filter to just the united states, still 4,311...\n",
    "df = df[ df['CountryCode']=='US' ]\n",
    "df = df[ df['State']=='NE' ]\n",
    "\n",
    "# let's pick a random 100 of these\n",
    "df_samples = df.sample(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Retrieving USC00257070.dly\n",
      "Transposing USC00257070 dataset\n",
      "Applying data prep operations\n",
      "Success!\n",
      "Retrieving USC00250435.dly\n",
      "Transposing USC00250435 dataset\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Nate\\Anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py:741: UserWarning: Duplicate names specified. This will raise an error in the future.\n",
      "  return _read(filepath_or_buffer, kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applying data prep operations\n",
      "Success!\n",
      "Retrieving USC00253605.dly\n",
      "Transposing USC00253605 dataset\n",
      "Applying data prep operations\n",
      "Success!\n"
     ]
    }
   ],
   "source": [
    "# now we need to pull the daily file information for \n",
    "# each of these 100 records\n",
    "# this will take some time, which is why we reduced\n",
    "# to just 100 in the first place!\n",
    "\n",
    "# let's try the process with just a single station\n",
    "# then we can iterate and concat\n",
    "def c_to_f(row):\n",
    "    # (0°C × 9/5) + 32 = 32°F\n",
    "    return ( ((((row['VALUE']/10) * 9) / 5) + 32))\n",
    "\n",
    "full_df = pd.DataFrame()\n",
    "\n",
    "for station in df_samples['StationID']:\n",
    "    df_daily = noaa_datasets.DailyFile(station)\n",
    "    df_daily.df = df_daily.df[ df_daily.df['ELEMENT']=='TMIN' ]\n",
    "    df_daily.df['Low Temp (f)'] = df_daily.df.apply(c_to_f, axis=1)\n",
    "    df_daily.df = df_daily.df[ df_daily.df['YEAR']>2010 ]\n",
    "    full_df = pd.concat([full_df, df_daily.df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df = pd.merge(full_df, stations.df, left_on='StationID', right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df.to_csv('NOAA Low Daily Temperatures Analysis.csv', sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
